# 由wordpiece解码为单词
import sys
from tools.wpm_decode import utt_id

text_in = sys.argv[1]
text_out = sys.argv[2]

with open(text_in, 'r', encoding='utf-8') as f:
    with open(text_out, 'w', encoding='utf-8') as w:
        for line in f:
            parts = line.strip().split()
            utt_id = parts[0]
            sent = ''.join(parts[1:]).replace('_', ' ')
            w.write(utt_id+' '+sent+'\n')